import requests
from bs4 import BeautifulSoup
import re
import random
'''
简单的爬虫
'''
base_url = "https://baike.baidu.com"
headers={
"User-Agent" : "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"
}
# his=["/item/2020%E5%B9%B4%E6%96%B0%E5%9E%8B%E5%86%A0%E7%8A%B6%E7%97%85%E6%AF%92%E7%96%AB%E6%83%85"]
his=["/item/%E7%BD%91%E7%BB%9C%E7%88%AC%E8%99%AB/5162711"]
res=""
for n in range(20):
    url=base_url+his[-1]
    try:
        res=requests.get(url,headers=headers)
    except Exception as e:
        print(e)
        continue
    res.encoding="utf-8"
    soup=BeautifulSoup(res.text,'html.parser')
    doms=soup.find_all('a',{"href":re.compile("^/item/(%.{2})+$")})
    if len(doms)!=0:
        dom=random.sample(doms, 1)[0]
        his.append(dom['href'])
        print(n,dom.get_text(),'url:'+his[-1])
    else:
        his.pop()

print(his)
